##########################################
### Analysis: Sensitivity Checks ######
##########################################

## Purpose: This script includes the 
## visuals and models used to conduct various
## sensitivity checks. 
## Data In: 
## 1) data_files/datafull_5_6_2025.rds
## 2) data_files/anes_timeseries_cdf_stata_20220916.dta
## 3) data_files/Frank_WID_2015.dta
## 4) "data_files/Frank_Gini_2015.dta
## Data Out: 
## figures/question_place_percent.pdf 
## figures/question_place_comparison_comparison.pdf 
## figures/phi_racial_distributional 
## figures/overall_plot_racial.pdf 
## figures/racial_gains_party_comparison.pdf 
## figures/party_percent.pdf 
## figures/cohort_time_alt.pdf 
## figures/alienation_comparison.pdf 
## figures/logit_comparison_party.pdf 
## Software Versions: 

## R version 4.1.2

library(RColorBrewer) ## 1.1.3
library(tidyverse) ## 2.0.0
library(viridis) ## 0.6.5
library(MASS) ## 7.3.60.2
library(stargazer) ## 5.2.3
library(readstata13) ## 0.10.0
library(openxlsx) ## 4.2.6.1

## set working directory to Replication package

data.full <- readRDS("data_files/datafull_5_6_2025.rds")

## Before running this code, download the ANES data file
## following instructions in the README and save the .dta
## file in the "data_files" folder
anes <- read.dta13("data_files/anes_timeseries_cdf_stata_20220916.dta")

## Before running this code, download the WID and Gini files
## following instructions in the README for downloading
## the United States state level gini and top income share
## datasets. Save the two files in the "data_files"
## folder. 
income_share_data <- read.dta13("data_files/Frank_WID_2015.dta") 
gini_data <- read.dta13("data_files/Frank_Gini_2015.dta")



## colorblind palette for figures
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", 
                "#009E73", "#F0E442", "#0072B2",
                "#D55E00", "#CC79A7")


#############################
### Cleaning and Functions ##
#############################

## limit anes to relevant variables
## VCF0009z - appropriate weight for combined
## sample, party ID question (see the codebook and appendices
## for information on weights)
## VCF0302 - party ID of respondent 
## VCF0004 - year 

anes <- anes %>%
  dplyr::select(VCF0004, VCF0302, VCF0009z)
colnames(anes) <- c("Year", "Party_ID", "Weight")

## this function calculates s.e. and point estimates
## across simulation draws, used below

estimates.fct <- function(frame){
  est <- as.data.frame(matrix(NA, ncol = 3, nrow = ncol(frame)))
  colnames(est) <- c("Est", "Lower", "Upper")
  est$Est <- base::apply(frame, 2, function(x){mean(x)})
  est$Lower <- base::apply(frame, 2, function(x){quantile(x, .025)})
  est$Upper <- base::apply(frame, 2, function(x){quantile(x, .975)})
  return(est)
}

## weighted proportion
weighted_proportion <- function(vec, value, weight){
  num <- (vec == value)*weight
  num <- sum(num)
  denom <- sum(weight)
  prop <- num / denom
  return(prop)
}

#####################################
## sensitivity check - glm ##########
#####################################

## We examine the sensitivity of our main 
## results to using a logit specification rather than 
## OLS in the SI, Figure G.1

## in the code below we calculate 
## estimated values: average values of our
## outcome variable at different levels of our
## predictor variables, holding all other variables
## constant.
## We use our estimated regression coefficients to calculate
## these estimated values. 
## We estimate uncertainty around
## the estimated values using a parametric bootstrap. 
## see the 03_analysis_full_data_ev.R for more 
## explanation of our simulations for expected values. 

glm1 <- glm(inequality.bin ~ year_5*party.harm + race2 + ses + cohort2 + gender, data = data.full,
            family = "binomial")
lm1 <- lm(inequality.num ~ year_5*party.harm + race2 + ses + cohort2 + gender, data = data.full)

vcov <- vcov(glm1)
vcov2 <- vcov(lm1)
betas <- glm1$coefficients
betas2 <- lm1$coefficients

set.seed(08540)
beta.draws <- MASS::mvrnorm(10000, mu = betas, Sigma = vcov)
beta.draws2 <- MASS::mvrnorm(10000, mu = betas2, Sigma = vcov2)

## create matrix 
matrix <- matrix(0, nrow = 30, 
                 ncol = length(beta.draws[1,]))
matrix[,1] <- 1
colnames(matrix) <- names(beta.draws[1,])
rownames(matrix) <- 1:nrow(matrix)

## year - different year for each level
## main year effects:

## for each level:
## each level gets a row of zeros (for first 1966-1970)
## and then a single year 

for(i in 2:10){
  matrix[i, i] <- 1
  matrix[i+10, i] <- 1
  matrix[i+20, i] <- 1
} 

## year*party interaction effects:
for(i in 2:10){
  matrix[i+10, i + 22] <- 1
  matrix[i+20, i + 31] <- 1
}


## main party effects:
matrix[11:20, "party.harmDemocrat"] <- 1
matrix[21:30, "party.harmIndep/Other"] <- 1

## other variables
## set at Boomers
## white
## ses: bin 3 (not ordered factor)
matrix[,"race2Yes"] <- 1
matrix[,"ses3"] <- 1
matrix[,"gendermale"] <- 1


## for each draw of coefficients from the
## estimated distribution, we calculate
## estimated values using our matrix and the logit
## inverse link function (to put our estimated
## values on the predicted probability scale)

sim <- base::apply(beta.draws, 1, function(x){
  vector <- matrix %*% x
  
  ## inverse link
  pi <- 1 / (1 + exp(-(vector)))
  
  return(pi)
})
sim <- t(sim)
sim <- as.data.frame(sim)

## calculate 2.5th and 97.5th percentile
## of estimated values for confidence intervals
logistic_estimates <- estimates.fct(sim)

logistic_estimates$year <- rep(levels(data.full$year_5), 3)
logistic_estimates$value <- c(rep(levels(data.full$party.harm)[1], 10), 
                           rep(levels(data.full$party.harm)[2], 10),
                           rep(levels(data.full$party.harm)[3], 10))

## ols estiamted values
sim2 <- base::apply(beta.draws2, 1, function(x){
  vector <- matrix %*% x
  
  return(vector)
})
sim2 <- t(sim2)
sim2 <- as.data.frame(sim2)
ols_estimates <- estimates.fct(sim2)

ols_estimates$year <- rep(levels(data.full$year_5), 3)
ols_estimates$value <- c(rep(levels(data.full$party.harm)[1], 10), 
                              rep(levels(data.full$party.harm)[2], 10),
                              rep(levels(data.full$party.harm)[3], 10))


logistic_estimates$model <- "Logit"
ols_estimates$model <- "OLS"

frame <- bind_rows(logistic_estimates,
                  ols_estimates)

frame$year_date <- gsub("-[0-9]{4}",
                                  "", 
                        frame$year)
frame$year_date <- as.Date(paste0(frame$year_date,
                                            "-01-01",
                                            sep = ""))

ggplot(frame,
       aes(x = year_date,
           y = Est,
           color = value,
           group = value,
           shape = value,
           ymin = Lower,
           ymax = Upper)) +
  geom_point() +
  geom_errorbar(width = 0) +
  geom_line() +
  facet_wrap(~ model) +
  scale_color_manual(values = c("#2c7fb8",
                                "#31a354",
                                "#f03b20")) +
  scale_y_continuous(limits = c(0,1),
                     labels = scales::percent_format()) +
  theme_classic() +
  labs(x = NULL,
       y = "Estimated Percent of Respondents\nWho Think 'Rich Getting Richer'",
       color = "Party ID",
       shape = "Party ID") +
  theme(legend.position = "bottom")
## saved as figures/logit_comparison_party.pdf
## 5x7


##############################################
## Include Alienation Index ##################
##############################################

## We recreate our estimated values plot
## for the main results, Figure 2, but controlling
## for alienation index
## These results are included in the SI
## Figure G.2

lm1 <- lm(inequality.num ~ year_5*party.harm + race2 + ses + cohort2 + gender, 
          data = data.full[!is.na(data.full$alienation), ])
lm2 <- lm(inequality.num ~ year_5*party.harm + race2 + ses + cohort2 + gender + alienation, 
          data = data.full[!is.na(data.full$alienation), ])

vcov <- vcov(lm1)
vcov2 <- vcov(lm2)
betas <- lm1$coefficients
betas2 <- lm2$coefficients


matrix_alienation <- matrix(0, nrow = 30, 
                 ncol = length(betas2))
matrix_alienation[,1] <- 1
colnames(matrix_alienation) <- names(betas2)
rownames(matrix_alienation) <- 1:nrow(matrix_alienation)

## year - different year for each level
## main year effects:

## for each level:
## each level gets a row of zeros (for first 1966-1970)
## and then a single year 

for(i in 2:10){
  matrix_alienation[i, i] <- 1
  matrix_alienation[i+10, i] <- 1
  matrix_alienation[i+20, i] <- 1
} 

## year*party interaction effects:
for(i in 2:10){
  matrix_alienation[i+10, i + 25] <- 1
  matrix_alienation[i+20, i + 34] <- 1
}


## main party effects:
matrix_alienation[11:20, "party.harmDemocrat"] <- 1
matrix_alienation[21:30, "party.harmIndep/Other"] <- 1

## other variables
## set at Boomers
## white
## ses: bin 3 (not ordered factor)
## alienation set at 2
matrix_alienation[,"race2Yes"] <- 1
matrix_alienation[,"ses3"] <- 1
matrix_alienation[, "alienationOne_Q"] <- 1
matrix_alienation[,"gendermale"] <- 1

set.seed(08540)
beta.draws <- MASS::mvrnorm(10000, mu = betas, Sigma = vcov)
beta.draws2 <- MASS::mvrnorm(10000, mu = betas2, Sigma = vcov2)

## for each draw of coefficients from the
## estimated distribution, we calculate
## estimated values 

sim1 <- base::apply(beta.draws, 1, function(x){
  vector <- matrix %*% x
  
  return(vector)
})
sim1 <- t(sim1)
sim1 <- as.data.frame(sim1)

## calculate 2.5th and 97.5th percentile
## of estimated values for confidence intervals
lm1_estimates <- estimates.fct(sim1)

lm1_estimates$year <- rep(levels(data.full$year_5), 3)
lm1_estimates$value <- c(rep(levels(data.full$party.harm)[1], 10), 
                         rep(levels(data.full$party.harm)[2], 10),
                         rep(levels(data.full$party.harm)[3], 10))


## repeat for other model specification 
sim2 <- base::apply(beta.draws2, 1, function(x){
  vector <- matrix_alienation %*% x
  
  return(vector)
})
sim2 <- t(sim2)
sim2 <- as.data.frame(sim2)
lm2_estimates <- estimates.fct(sim2)

lm2_estimates$year <- rep(levels(data.full$year_5), 3)
lm2_estimates$value <- c(rep(levels(data.full$party.harm)[1], 10), 
                         rep(levels(data.full$party.harm)[2], 10),
                         rep(levels(data.full$party.harm)[3], 10))


lm2_estimates$model <- "Controlling for Alienation"
lm1_estimates$model <- "Not Controlling for Alienation"

frame <- bind_rows(lm2_estimates,
                   lm1_estimates)

frame$year_date <- gsub("-[0-9]{4}",
                        "", 
                        frame$year)
frame$year_date <- as.Date(paste0(frame$year_date,
                                  "-01-01",
                                  sep = ""))

ggplot(frame,
       aes(x = year_date,
           y = Est,
           color = value,
           group = value,
           shape = value,
           ymin = Lower,
           ymax = Upper)) +
  geom_point() +
  geom_errorbar(width = 0) +
  geom_line() +
  facet_wrap(~ model) +
  scale_color_manual(values = c("#2c7fb8",
                                "#31a354",
                                "#f03b20")) +
  scale_y_continuous(limits = c(0,1),
                     labels = scales::percent_format()) +
  theme_classic() +
  labs(x = NULL,
       y = "Estimated Percent of Respondents\nWho Think 'Rich Getting Richer'",
       color = "Party ID",
       shape = "Party ID") +
  theme(legend.position = "bottom")
## saved as figures/alienation_comparison.pdf
## 5x7

sum(is.na(data.full$alienation)) ## 6,269

############################################
## question wording ##########################
##############################################

## In this section investigate whether question wording is
## associated with respondents’ agreement with the question 
## that the “rich are getting richer.” There were slight 
## variations across the Pew and Harris surveys with this
## question, which are discussed in the SI, section H.
## We test this association by modeling our outcome variable
## (rich getting richer) on question version. 


## here we collapse two versions of the question
## 1) harris version with options agree, don't agree, not sure
## 2) harris version with options feel, don't feel, not sure
## We do this because there are only 150 respondents in the first category

data.full$inequality.variable <- dplyr::recode(data.full$inequality.variable,
                                               `Harris_agree__don't_agree__not_sure` =
                                                 "Harris_feel__don't_feel__not_sure")


check1 <- lm(inequality.num ~ factor(year)  + inequality.variable, data = data.full)                 
check2 <- lm(inequality.num ~ factor(year) + inequality.variable +
               party.harm + cohort2 +
               ses + race2 + gender, data = data.full)
check3 <-  lm(inequality.num ~ factor(year)*party.harm + inequality.variable +
                 cohort2 +
                ses + race2 + gender, data = data.full)
check4 <-  lm(inequality.num ~ factor(year)*cohort2 + inequality.variable +
                party.harm +
                ses + race2 + gender, data = data.full)
check5 <-  lm(inequality.num ~ factor(year)*ses + inequality.variable +
                party.harm +
                cohort2 + race2 + gender, data = data.full)
check6 <-  lm(inequality.num ~ factor(year)*race2 + inequality.variable +
                party.harm +
                cohort2 + ses + gender, data = data.full)

stargazer(check1, check2, check3, check4, check5, check6, omit = "year")

#############################################
#### cohort sensitivity check ###############
#############################################

## This code replicates our cohort analysis
## displayed in figure 2 with an alternative
## specification for cohorts (decades versus
## categorical labels).

## re-ordering cohort variable
## so boomers are base category (born 1945-1954)
data.full$cohort <- factor(data.full$cohort,
                           levels = c("1945-1954",
                                      "Before 1900",
                                      "1900-1914",
                                      "1915-1924",
                                      "1925-1934",
                                      "1935-1944",
                                      "1955-1964",
                                      "1965-1964",
                                      "1965-1974",
                                      "1975-1984",
                                      "1985-1995"))

## use same data set as main figure:
## but also need to exclude one additional year
## so that categories are not dropped
data.cohortintx <- data.full %>% filter(cohort2 != "Before Greatest" &
                                          cohort2 != "Millenials" &
                                          cohort != "1975-1984" & ## additionally
                                          ## excluded a few extra - cohort
                                          ## groupings don't exactly overlap
                                          year >= 1987)
data.cohortintx$cohort <- droplevels(data.cohortintx$cohort)

lm1 <- lm(inequality.num ~ year_5*cohort + party.harm + race2 + ses + gender, data = data.cohortintx)

### simulating draws from the estimated
## distribution of model coefficients, centered
## at the observed estimates
vcov <- vcov(lm1)
betas <- lm1$coefficients

set.seed(08540)
beta.draws <- MASS::mvrnorm(10000, mu = betas, Sigma = vcov)

## create matrix for calculating estimated values
table(data.cohortintx$year_5) ## 6
table(data.cohortintx$cohort) ## 2 - 1945-1954,
## 1965-1974


matrix <- matrix(0, nrow = 12, 
                 ncol = length(beta.draws[1,]))
matrix[,1] <- 1
colnames(matrix) <- names(beta.draws[1,])
rownames(matrix) <- 1:nrow(matrix)

## year - different year for each level
## main year effects:

## for each level:
## each level gets a row of zeros (for first 1966-1970)
## and then a single year 
for(i in 2:6){
  matrix[i, i] <- 1
  matrix[i+6, i] <- 1
} 

## year*cohort interaction effects:
for(i in 2:6){
  matrix[i+6, i + 38] <- 1
}

## main cohort effects:
matrix[6:12, "cohort1965-1974"] <- 1

## other variables:
## set at Republican, ses3, White
matrix[,"ses3"] <- 1
matrix[,"race2Yes"] <- 1
matrix[,"gendermale"] <- 1

## for each draw of coefficients from the
## estimated distribution, we calculate
## estimated values using our matrix

sim <- base::apply(beta.draws, 1, function(x){
  vector <- matrix %*% x
  return(vector)
})
sim <- t(sim)
sim <- as.data.frame(sim)

## calculate 2.5th and 97.5th percentile
## of estimated values for confidence intervals
estimates <- estimates.fct(sim)


estimates$year <- rep(levels(data.cohortintx$year_5)[5:10], 2)
estimates$year_date <- gsub("-[0-9]{4}",
                                "", 
                                estimates$year)
estimates$year_date <- as.Date(paste0(estimates$year_date,
                                          "-01-01",
                                          sep = ""))
estimates$cohort <- c(rep("Boomer: 1945-1954", 6),
                      rep("Gen-X: 1965-1974", 6))

ggplot(estimates,
       aes(x = year_date,
           y = Est,
           color = cohort,
           group = cohort,
           ymin = Lower,
           ymax = Upper)) +
  geom_point() +
  geom_errorbar(width = 0) +
  geom_line() +
  scale_color_manual(values = cbbPalette[c(1,3)]) +
  scale_y_continuous(limits = c(0, 1),
                     labels = scales::percent_format()) +
  theme_classic() +
  theme(legend.position = "bottom") +
  labs(x = NULL,
       y = "Estimated Percnet of Respondents\nWho Think 'Rich Getting Richer'",
       color = NULL)
## saved as figures/cohort_time_alt.pdf
## 5x7



##################################
## Party Descriptive Plot
##################################

## This plot looks at trends in party identification
## in our combined Harris-Pew sample and compares it with
## trends in party identification in the ANES.
## The results of this analysis are included in the
## SI, Section D. 

data.full$year_date <- as.Date(paste0(data.full$year,
                              "-01-01"))

## This code calculates weighted and un-weighted
## percents of respondents in the combined Harris-Pew
## subsample who identify with each party by year.
time_trend_party <- data.full %>%
  filter(!is.na(weight.fulldata)) %>%
  dplyr::group_by(year_date) %>%
  dplyr::summarize(Republican = sum(party.harm == "Republican") / n(),
            `Indep/Other` = sum(party.harm == "Indep/Other") / n(),
            Democrat = sum(party.harm == "Democrat") / n(),
            Republican_Weighted = weighted_proportion(vec = party.harm,
                                                      value = "Republican",
                                                      weight = weight.fulldata),
            Democrat_Weighted = weighted_proportion(vec = party.harm,
                                                      value = "Democrat",
                                                      weight = weight.fulldata),
            `Indep/Other_Weighted` = weighted_proportion(vec = party.harm,
                                                      value = "Indep/Other",
                                                      weight = weight.fulldata))

### not including 1987 in the analysis
## because independents were collapsed into "don't know" for that year
time_trend_party <- time_trend_party %>%
  filter(year_date != "1987-01-01")

## creating long version of the data
time_trend_party <- time_trend_party %>%
  gather(key = "Party",
         value = "Percent",
         -year_date)
## indicator for whether weighted or unweighted
time_trend_party$survey <- ifelse(grepl("Weighted", time_trend_party$Party),
                                        "Harris/Pew Weighted",
                                        "Harris/Pew Unweighted")
time_trend_party$Party <- gsub("_Weighted", "",
                               time_trend_party$Party)

## cleaning ANES party ID
## collapsing no preference/independent/other -> Indep
## as we did with Harris/Pew
anes$Party_ID <- dplyr::recode(anes$Party_ID,
                               `1` = "Republican",
                               `2` = "Indep",
                               `3` = "Indep",
                               `4` = "Indep",
                               `5` = "Democrat",
                               `8` = "DK",
                               `9` = "Refused")
anes$Party_ID[anes$Party_ID == "Refused" |
                anes$Party_ID == "DK"] <- NA
anes$year_date <- as.Date(paste(anes$Year,
                                "-01-01",
                                sep = ""))

sum(is.na(anes$Party_ID)) ## 14136
table(is.na(anes$Party_ID),
      anes$Year) ## missing before 1972

anes <- anes %>%
  filter(year_date >= as.Date("1972-01-01") &
           !is.na(Party_ID))

## calculating weighted proportions in ANES data
time_trend_anes <- anes %>%
  dplyr::group_by(year_date) %>%
  dplyr::summarize(Republican = sum(Party_ID == "Republican") / n(),
                   `Indep/Other` = sum(Party_ID == "Indep") / n(),
                   Democrat = sum(Party_ID == "Democrat") / n(),
                   Republican_Weighted = weighted_proportion(vec = Party_ID,
                                                             value = "Republican",
                                                             weight = Weight),
                   Democrat_Weighted = weighted_proportion(vec = Party_ID,
                                                           value = "Democrat",
                                                           weight = Weight),
                   `Indep/Other_Weighted` = weighted_proportion(vec = Party_ID,
                                                        value = "Indep",
                                                        weight = Weight))

## formatting ANES estimates 
## same as Harris/Pew estimates
time_trend_anes <- time_trend_anes %>%
  gather(key = "Party",
         value = "Percent",
         -year_date)
time_trend_anes$survey <- ifelse(grepl("Weighted", time_trend_anes$Party),
                                  "ANES Weighted",
                                  "ANES Unweighted")
time_trend_anes$Party <- gsub("_Weighted", "",
                              time_trend_anes$Party)


time_trend <- bind_rows(time_trend_party,
                        time_trend_anes)


ggplot(time_trend[time_trend$survey != "Harris/Pew Unweighted" &
                    time_trend$survey != "ANES Unweighted" &
                    time_trend$year_date >= as.Date("1972-01-01") &
                    time_trend$year_date <= as.Date("2013-01-01"),],
       aes(x = year_date, y = Percent,
           lty = survey)) +
  facet_wrap(~ Party) + 
  geom_line() +
  scale_y_continuous("Percent of Respondents", 
                     labels = function(x) paste0(x*100, "%"),
                     limits = c(0, 1)) +
  scale_color_manual(values = 
                       cbbPalette[c(7,6,5)]) +
  theme_bw() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x = NULL)

## figures/party_percent.pdf
## 5x7


time_trend %>%
  filter(survey == "ANES Weighted" |
           survey == "Harris/Pew Weighted") %>%
  filter(year_date >= as.Date("1980-01-01") &
           year_date <= as.Date("2013-01-01")) %>%
  dplyr::group_by(year_date,
           Party) %>%
  dplyr::summarize(diff = Percent[1] - Percent[2]) %>%
  dplyr::group_by(Party) %>%
  dplyr::summarize(mean = mean(diff, na.rm = TRUE))
## post 1980
## average difference - 2.54% more Democrats in our sample
## 5.31% less indep/other, 2.8% more Republicans


##################################
##### Alternative Outcome ########
##################################

## In this section we investigate whether 
## our outcome variable "rich are getting richer"
## is a proxy for respondents' perceptions of 
## racial inequality. These results are 
## included in the main text, Figure 7.
## That plot includes estimated values for the 
## percent of respondents who agree the rich 
## are getting richer by year, party ID,
## and whether the respondent thinks Black-White
## inequality has improved or not.

## Here is the racial inequality perceptions
## question, available in the Pew data only:

## "In the past few years there hasn't been much real
#improvement in the position of black people in
#this country" 
data.full$racial_gains_bin <- ifelse(data.full$racial_gains ==
                                       "Feel", 1, 0)

## only Pew has this question,
## so when we restrict data to respondents
## who included this question looking at Pew only.

data_gains <- data.full %>%
  filter(!is.na(racial_gains_bin))

model <- lm(inequality.num ~ +
              factor(year_5)*racial_gains_bin*party.harm + ses + cohort2 + race2 + gender, 
            data = data_gains)
vcov <- vcov(model)


### simulating draws from the estimated
## distribution of model coefficients, centered
## at the observed estimates
## note: see 03_analysis_full_data_ev.R for 
## fuller description of this code and simulation. 
betas <- model$coefficients

set.seed(08540)
beta.draws <- MASS::mvrnorm(10000, mu = betas, Sigma = vcov)

## we create matrix to allow us to calculate estimated
## values for each set of simulated coefficients
length(unique(data_gains$year_5)) ## 6
matrix <- matrix(0, nrow = 36, 
                            ncol = length(betas))
matrix[,1] <- 1
colnames(matrix) <- names(betas)

for(i in 2:6){
  matrix[i, i] <- 1 ## Republican, no
  matrix[i+6, i] <- 1 ## Rebublican, yes
  matrix[i+12, i] <- 1 ## Indep/other, no
  matrix[i+18, i] <- 1  ## Indep/other, yes
  matrix[i+24, i] <- 1 ## Democrat, no
  matrix[i+30, i] <- 1 ## democrat yes
} 

## year*racial bin interaction effects:
for(i in 2:6){
  matrix[i+6, i + 19] <- 1
  matrix[i+18, i + 19] <- 1
  matrix[i+30, i + 19] <- 1
}

## main racial gains:
matrix[c(7:12,
         19:24,
         31:36), "racial_gains_bin"] <- 1

## main party
matrix[13:24, "party.harmIndep/Other"] <- 1
matrix[25:36, "party.harmDemocrat"] <- 1

## year*party interaction effects
for(i in 2:6){
  matrix[i+12, i + 29] <- 1
  matrix[i+18, i + 29] <- 1
  matrix[i+24, i + 24] <- 1
  matrix[i+30, i + 24] <- 1
}

## racial gains - party interaction
matrix[31:36, "racial_gains_bin:party.harmDemocrat"] <- 1
matrix[19:24, "racial_gains_bin:party.harmIndep/Other"] <- 1

## year*party*racial gains interaction effects
for(i in 2:6){
  matrix[i+18, i + 41] <- 1  ## Indep/other, yes
  matrix[i+30, i + 36] <- 1 ## democrat yes
} 


## other variables
## set at Boomers
## white
## ses: bin 3 (not ordered factor)
matrix[,"race2Yes"] <- 1
matrix[,"ses3"] <- 1
matrix[,"gendermale"] <- 1


## for each draw of coefficients from the
## estimated distribution, we calculate
## estimated values using our matrix

sim <- base::apply(beta.draws, 1, function(x){
  vector <- matrix %*% x
  return(vector)
})
sim <- t(sim)
sim <- as.data.frame(sim)

## calculate 2.5th and 97.5th percentile
## of estimated values for confidence intervals
estimates <- estimates.fct(sim)


estimates$year <- rep(unique(data_gains$year_5), 6)
estimates$year_date <- gsub("-[0-9]{4}",
                            "", 
                            estimates$year)
estimates$year_date <- as.Date(paste0(estimates$year_date,
                                      "-01-01",
                                      sep = ""))
estimates$party <- c(rep("Republicans", 12),
                     rep("Indep/Other", 12),
                     rep("Democrats", 12))
estimates$gains <- rep(c(rep("Believe Position of\nBlack People Has Improved", 6),
                     rep("Believe Position of\nBlack People Has Not Improved", 6)), 3)

ggplot(estimates,
       aes(x = year_date,
           y = Est,
           #color = party,
           shape = party,
           lty = party,
           group = party,
           ymin = Lower,
           ymax = Upper)) +
  geom_point() +
  geom_errorbar(width = 0) +
  geom_line() +
  scale_y_continuous(limits = c(0, 1),
                     labels = scales::percent_format()) +
  theme_bw() +
  scale_color_manual(values = c("#2c7fb8",
                                "#31a354",
                                "#f03b20")) +
  facet_wrap(~ gains) +
  theme(legend.position = "bottom") +
  labs(x = NULL,
       y = "Estimated Percnet of Respondents\nWho Think 'Rich Getting Richer'",
       color = NULL,
       shape = NULL)
## saved as figures/racial_gains_party_comparison.pdf
## 5x7
## bw version: 
## figures/racial_gains_party_comparison_bw.pdf


## In the SI, section J we include 
## additional results for this analysis.
## 1) In this plot we examine overall time
## trends for our "racial gains" variable
## versus our "rich get richer" variable. 
## We don't use weighted means here because
## our weights are for the full data set but
## we only have Pew data here. 

## calculating means and standard errors
overall_mean_alt <- data.full %>%
  group_by(year_date) %>%
  summarise(mean_in = mean(inequality.num),
            se_in = sqrt(mean_in * (1-mean_in) / n()),
            lower_in = mean_in + qnorm(.025)*se_in,
            upper_in = mean_in + qnorm(.975)*se_in,
            mean_gains = mean(racial_gains_bin, 
                              na.rm = TRUE),
            se_gains = sqrt(mean_gains * (1-mean_gains) / sum(!is.na(racial_gains_bin))),
            lower_gains = mean_gains + qnorm(.025)*se_gains,
            upper_gains = mean_gains + qnorm(.975)*se_gains)

## creating long version
overall_mean_alt <- overall_mean_alt %>%
  gather(key = "variable", 
         value = "estimate",
         -year_date)
overall_mean_alt$variable_type <- gsub("*(_[a-zA-Z]{1,})",
                                       "\\2", overall_mean_alt$variable)
overall_mean_alt$variable <- gsub("^.*_", "", overall_mean_alt$variable)

overall_mean_alt <- spread(overall_mean_alt,
                           key = "variable_type",
                           value = "estimate")
overall_mean_alt <- overall_mean_alt[complete.cases(overall_mean_alt), ]


colnames(overall_mean_alt)[1] <- "year"

overall_mean_alt$variable <- dplyr::recode(overall_mean_alt$variable,
                                           `gains` = "Percent Who Agree\n'There Hasn't Been Much Improvement in Position of Black People'",
                                           `in` = "Percent Who Agree 'Rich are Getting Richer'")
overall_mean_alt$variable <- factor(overall_mean_alt$variable,
                                    levels = c("Percent Who Agree 'Rich are Getting Richer'",
                                               "Percent Who Agree\n'There Hasn't Been Much Improvement in Position of Black People'"))

ggplot(overall_mean_alt,
       aes(x = year,
           y = mean,
           color = variable,
           fill = variable)) +
  geom_point() +
  geom_ribbon(mapping = aes(ymin = lower,
                            ymax = upper),
              alpha = .2) +
  theme_bw() +
  coord_cartesian(xlim = c(as.Date("1966-01-01"), 
                           as.Date("2013-01-01"))) +
  scale_y_continuous(NULL, labels = scales::percent_format()) +
  scale_color_manual(name = NULL, values = 
                       cbbPalette[c(2:4)]) +
  scale_fill_manual(name = NULL,
                    values = 
                      cbbPalette[c(2:4)]) +
  scale_linetype_discrete(name = "Actual Inequality") +
  theme(legend.direction = "vertical", legend.position = "bottom") +
  geom_vline(mapping = aes(xintercept = as.Date("1991-3-3")),
             lty = 2) +
  annotate(x = as.Date("1980-1-1"),
           y = .55,
           geom = "label",
           label = "1992 Rodney King\nBeating") +
  annotate(x = as.Date("1980-1-1"),
           xend = as.Date("1991-3-3"),
           y = .6,
           yend = .65,
           geom = "curve",
           curvature = -.1,
           arrow = arrow(length = unit(0.03, "npc"))) +
  labs(x = NULL)
## figures/overall_plot_racial.pdf
## 5x7

## 2) We also examine the correlation between
## our two outcome variables ("rich get richer" and
## whether there has been improvement in the 
## position of Black people). Also included in SI,
## section J. 

## To calculate the Phi coefficient we first
## sum the number in each join cell. 
## We multiply each sum by a scalar because
## otherwise the phi coefficient denominator is too large to compute. 
## if you add a scalar the coefficient is equivalent
## we checked by proof.

coefficient <- data_gains %>%
  dplyr::group_by(year) %>%
  dplyr::summarize(n_1_1 = sum(racial_gains_bin == 1 &
                                 inequality.num == 1) * .2,
                   n_0_0 = sum(racial_gains_bin == 0 &
                                 inequality.num == 0)* .2,
                   n_1_0 = sum(racial_gains_bin == 1 &
                                 inequality.num == 0)* .2,
                   n_0_1 = sum(racial_gains_bin == 0 &
                                 inequality.num == 1)* .2,
                   n_inequality_1 = sum(inequality.num == 1)* .2,
                   n_inequality_0 = sum(inequality.num == 0)* .2,
                   n_gains_0 = sum(racial_gains_bin == 0)* .2,
                   n_gains_1 = sum(racial_gains_bin == 1)* .2,
                   cor = cor(racial_gains_bin, inequality.num))

coefficient$numerator <- (coefficient$n_1_1*coefficient$n_0_0 -
                            coefficient$n_0_1*coefficient$n_1_0)
coefficient$denominator <- sqrt(coefficient$n_inequality_1 *
                                  coefficient$n_inequality_0 *
                                  coefficient$n_gains_0 *
                                  coefficient$n_gains_1)
coefficient$phi <- coefficient$numerator / coefficient$denominator

ggplot(coefficient,
       mapping = aes(x = year,
                     y = phi)) +
  geom_point() + 
  geom_line() +
  theme_bw() +
  scale_y_continuous(limits = c(-1, 1)) +
  geom_hline(mapping = aes(yintercept = 0),
             lty = 2,
             color = "red") +
  annotate(geom = "label",
           label = "No Relationship",
           x = 2005,
           y = -.1) +
  annotate(geom = "label",
           label = "Perfect Positive Correlation",
           x = 2005,
           y = .9) +
  annotate(geom = "label",
           label = "Perfect Negative Correlation",
           x = 2005,
           y = -.9) +
  labs(x = NULL,
       y = "Phi Coefficient:\nPerceptions of Distributional Inequality and Racial Inequality")
## figures/phi_racial_distributional
## 5x7


#######################################
## State level inequality #############
#######################################

## In the main text, section 2.1 we include
## in a footnote the comment that all states experienced
## growing within state inequality from 1990 to 2007. 
## We provide evidence for this statement here. 

check <- gini_data %>% 
  filter(Year >= 1990) %>%
  filter(Year %in% c(1990, 2007)) %>%
  group_by(State) %>%
  dplyr::summarise(diff = Gini[Year == 2007] -
                     Gini[Year == 1990]) 

summary(check$diff[check$State != "United States"])
sum(check$diff > 0)
check[check$diff <= 0,] ## all states but Alaska and Hawaii
## experienced increasing within state inequality


############################
### Question Placement #####
############################

## In the SI, section B
## we investigate whether our findings regarding
## partisan polarization are sensitive to the 
## order of question placement: whether
## the inequality question was asked before or 
## after the partisan identification question.

## here is the variable we created
## based on survey schedules:
table(data.full$question_place)


## The code below estimates the mean percent
## of respondents who agreed the "rich were getting richer"
## by year, party ID, and question placement

question_place_data <- data.full %>%
  filter(!is.na(question_place) &
           question_place != "questionnaire missing") %>%
  filter(year_5 != "1971-1975") ## don't include
## this year because there was no variation that year. 


lm <- lm(inequality.num ~ year_5*party.harm*question_place + race2 + ses + cohort2 + gender, 
          data = question_place_data)

vcov <- vcov(lm)
betas <- lm$coefficients

## create matrix for estimated value calculation
matrix_place <- matrix(0, nrow = 54, 
                            ncol = length(betas))
matrix_place[,1] <- 1
colnames(matrix_place) <- names(betas)
rownames(matrix_place) <- 1:nrow(matrix_place)

## year - different year for each level
## main year effects:

## for each level:
## each level gets a row of zeros (for first 1966-1970)
## and then a single year 

for(i in 2:9){
  matrix_place[i, i] <- 1 ## Republican, after  1:9
  matrix_place[i+9, i] <- 1 ## Republican, before 10:18
  matrix_place[i+18, i] <- 1 ## Democrat, after 19:27
  matrix_place[i+27, i] <- 1 ## Democrat, before 28:36
  matrix_place[i+36, i] <- 1 ## Indep, after 37:45
  matrix_place[i+45, i] <- 1 ## Indep, beore 46:54
} 


## year*party interaction effects:
for(i in 2:9){
  matrix_place[i+18, i+22] <- 1 ## Democrat, after
  matrix_place[i+27, i+22] <- 1 ## Democrat, before
  matrix_place[i+36, i+30] <- 1 ## Indep, after
  matrix_place[i+45, i+30] <- 1 ## Indep, beore
} 


## main party effects:
## Republican, Republican 
matrix_place[19:27, "party.harmDemocrat"] <- 1
matrix_place[28:36, "party.harmDemocrat"] <- 1
matrix_place[37:45, "party.harmIndep/Other"] <- 1
matrix_place[46:54, "party.harmIndep/Other"] <- 1

## main question place effects
matrix_place[10:18, "question_placebefore party"] <- 1
matrix_place[28:36, "question_placebefore party"] <- 1
matrix_place[46:54, "question_placebefore party"] <- 1

## other variables
## set at Boomers
## white
## ses: bin 3 (not ordered factor)
## alienation set at 2
matrix_place[,"race2Yes"] <- 1
matrix_place[,"ses3"] <- 1
matrix_place[,"gendermale"] <- 1

## year:question place interactions
for(i in 2:9){
  matrix_place[i+9, i+38] <- 1 ## Republican, before 10:18
  matrix_place[i+27, i+38] <- 1 ## Democrat, before 28:36
  matrix_place[i+45, i+38] <- 1 ## Indep, beore 46:54
} 


## year: question place : party interaction
for(i in 2:9){
  matrix_place[i+27, i+48] <- 1 ## Democrat, before 28:36
  matrix_place[i+45, i+56] <- 1 ## Indep, beore 46:54
} 

### simulating draws from the estimated
## distribution of model coefficients, centered
## at the observed estimates
set.seed(08540)
beta.draws <- MASS::mvrnorm(10000, mu = betas, Sigma = vcov)


## for each draw of coefficients from the
## estimated distribution, we calculate
## estimated values using our matrix
sim <- base::apply(beta.draws, 1, function(x){
  vector <- matrix_place %*% x
  
  return(vector)
})
sim <- t(sim)
sim <- as.data.frame(sim)

## calculate 2.5th and 97.5th percentile
## of estimated values for confidence intervals
lm_estimates <- estimates.fct(sim)

question_place_data$year_5 <- droplevels(question_place_data$year_5)

lm_estimates$year <- rep(levels(question_place_data$year_5), 6)
lm_estimates$party <- c(rep(levels(data.full$party.harm)[1], 18), 
                         rep(levels(data.full$party.harm)[2], 18),
                         rep(levels(data.full$party.harm)[3], 18))
lm_estimates$question_place <- rep(c(rep("Perceptions Asked\nAfter Party ID", 9),
                                 rep("Perceptions Before\nAfter Party ID", 9)), 3)

lm_estimates$year_date <- gsub("-[0-9]{4}",
                        "", 
                        lm_estimates$year)
lm_estimates$year_date <- as.Date(paste0(lm_estimates$year_date,
                                  "-01-01",
                                  sep = ""))


ggplot(lm_estimates,
       aes(x = year_date,
           y = Est,
           lty = question_place,
           group = question_place,
           ymin = Lower,
           ymax = Upper)) +
  geom_point(alpha = .4) +
  geom_errorbar(width = 0) +
  geom_line() +
  facet_wrap(~ party) +
  scale_color_viridis(discrete = TRUE,
                      option = "plasma") +
  scale_y_continuous(limits = c(0,1),
                     labels = scales::percent_format()) +
  theme_classic() +
  labs(x = NULL,
       y = "Estimated Percent of Respondents\nWho Think 'Rich Getting Richer'",
       lty = NULL) +
  theme(legend.position = "bottom")
## saved as figures/question_place_comparison_comparison.pdf
## 5 x 7


## This plot looks at the share of respondents
## who answered the inequality question over time.

table(data.full$question_place,
      data.full$year_5)
plot <- data.full %>% 
  filter(!is.na(question_place) &
           question_place != "questionnaire missing") %>%
  group_by(year_5) %>%
  dplyr::summarize(after = sum(question_place == "after party"),
                   before = sum(question_place != "after party"),
                   perc = after / n()) 

ggplot(plot,
       mapping = aes(x = factor(year_5),
                     y = perc,
                     group = 1)) +
  geom_point() + 
  geom_line() +
  theme_bw() +
  scale_y_continuous(limits = c(0,1),
                     labels = scales::percent_format()) +
  labs(x = NULL,
       y = "Percent of Respondents Who Answered\nInequality Question After Party ID Question")
## saved as figures/question_place_percent.pdf
## 5x7

